import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, assert_series, logical_or, to_list, fetch_index

atp_tennis = read_csv_file(os.path.join(sys.argv[1], 'atp_tennis.csv'))

# Filter the DataFrame
federer_matches = atp_tennis[logical_or(assert_series(atp_tennis['Player_1'], 'Federer R.', 'equality'), assert_series(atp_tennis['Player_2'], 'Federer R.', 'equality'))]

# Get the index using the custom function
federer_match_ids_index = fetch_index(federer_matches)

# Convert the index to a list
federer_match_ids = to_list(federer_match_ids_index)

print(federer_match_ids)
# pickle.dump(federer_match_ids,open("./ref_result/federer_match_ids.pkl","wb"))

import pandas as pd
import pickle
from decision_company import read_csv_file, fetch_column, logical_or, search_where, extract_unique_values, assert_series



# Find Federer matches
federer_matches = atp_tennis[logical_or(assert_series(atp_tennis['Player_1'], 'Federer R.', 'equality'), assert_series(atp_tennis['Player_2'], 'Federer R.', 'equality'))]

# Get Federer opponents
federer_opponents = extract_unique_values(search_where(fetch_column(federer_matches, 'Player_1'), assert_series(federer_matches['Player_1'], 'Federer R.', 'inequality'), federer_matches['Player_2'])) 

print(federer_opponents)
# pickle.dump(federer_opponents,open("./ref_result/federer_opponents.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, logical_and, logical_or, assert_series



def calculate_performance(player, opponent, data, surface_types):
    player_matches = data[logical_or(logical_and(assert_series(data['Player_1'], player, 'equality'), assert_series(data['Player_2'], opponent, 'equality')), logical_and(assert_series(data['Player_1'], opponent, 'equality'), assert_series(data['Player_2'], player, 'equality')))]
    total_matches = len(player_matches)
    wins = len(player_matches[assert_series(player_matches['Winner'], player, 'equality')])
    win_rate = wins / total_matches if total_matches > 0 else 0

    h2h_record = wins / total_matches if total_matches > 0 else 0

    surface_performance = {}
    for surface in surface_types:
        player_surface_matches = player_matches[assert_series(player_matches['Surface'], surface, 'equality')]
        player_surface_wins = len(player_surface_matches[assert_series(player_surface_matches['Winner'], player, 'equality')])
        surface_performance[surface] = player_surface_wins / player_surface_matches.shape[0] if player_surface_matches.shape[0] > 0 else 0

    return win_rate, h2h_record, surface_performance


import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, create_dataframe, concatenate_objects, extract_unique_values, update_dict



# Retrieve unique surface types
surface_types = extract_unique_values(atp_tennis['Surface'])

# Initialize an empty dataframe for storing the results
columns = ['Player', 'Opponent', 'Win Rate', 'Head-to-Head'] + [f'{surface} Surface Performance' for surface in surface_types]
player_stats = create_dataframe(data=[], columns=columns)

# Loop through Federer's opponents
for opponent in federer_opponents:
    # Calculate win rate, head-to-head record, and surface performance for Raonic M. against each opponent
    win_rate, h2h_record, surface_performance = calculate_performance('Raonic M.', opponent, atp_tennis, surface_types)

    # Append the results to the dataframe
    row_data = {'Player': 'Raonic M.', 'Opponent': opponent, 'Win Rate': win_rate, 'Head-to-Head': h2h_record}
    update_dict(row_data, {f'{surface} Surface Performance': surface_performance[surface] for surface in surface_types})
    player_stats = concatenate_objects(player_stats, create_dataframe(row_data, index=[0]))  

# Display the resulting dataframe   
print(player_stats)
# pickle.dump(player_stats,open("./ref_result/player_stats.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, concatenate_objects, bind_dataframe, reset_index, rename_columns, join_dataframes, assert_series, extract_unique_values, fetch_df_size, create_multiindex_from_product, convert_multiindex_to_dataframe, remove_labels, fill_missing_values



def calculate_head_to_head_win_rate(data):
    unique_players = extract_unique_values(concatenate_objects(data['Player_1'], data['Player_2']))

    total_matches = reset_index(fetch_df_size(bind_dataframe(data, ['Player_1', 'Player_2'])), drop=False)
    total_matches = rename_columns(total_matches, columns={0: 'count_total'})

    player_wins = fetch_df_size(bind_dataframe(data[assert_series(data['Winner'], data['Player_1'], 'equality')], ['Player_1', 'Player_2']))
    player_wins = reset_index(player_wins, drop=False)
    player_wins = rename_columns(player_wins, {0: 'count_wins'})

    multiindex = create_multiindex_from_product([unique_players, unique_players], names=['Player', 'Opponent'])
    all_pairs = convert_multiindex_to_dataframe(multiindex, index=False)
    all_pairs = all_pairs[assert_series(all_pairs['Player'], all_pairs['Opponent'], 'inequality')]

    h2h_win_rates = remove_labels(join_dataframes(all_pairs, total_matches, left_on=['Player', 'Opponent'], right_on=['Player_1', 'Player_2'], how='left'), columns=['Player_1', 'Player_2'])
    h2h_win_rates = remove_labels(join_dataframes(h2h_win_rates, player_wins, left_on=['Player', 'Opponent'], right_on=['Player_1', 'Player_2'], how='left'), columns=['Player_1', 'Player_2'])

    fill_missing_values(h2h_win_rates['count_wins'], value=0, inplace=True)
    fill_missing_values(h2h_win_rates['count_total'], value=1, inplace=True)

    h2h_win_rates['Head-to-Head Win Rate'] = h2h_win_rates['count_wins'] / h2h_win_rates['count_total']
    h2h_win_rates = remove_labels(h2h_win_rates, columns=['count_total', 'count_wins'])

    return h2h_win_rates


import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file


def estimate_probability(player_1_win_rate, player_2_win_rate, h2h_player_1_win_rate, h2h_player_2_win_rate, performance_weight=0.7, h2h_weight=0.3):
    if player_1_win_rate + player_2_win_rate == 0:
        player_1_win_rate = player_2_win_rate = 0.5

    if h2h_player_1_win_rate + h2h_player_2_win_rate == 0:
        h2h_player_1_win_rate = h2h_player_2_win_rate = 0.5

    weighted_player_1_win_rate = performance_weight * player_1_win_rate + h2h_weight * h2h_player_1_win_rate
    weighted_player_2_win_rate = performance_weight * player_2_win_rate + h2h_weight * h2h_player_2_win_rate
    total_weighted_win_rate = weighted_player_1_win_rate + weighted_player_2_win_rate

    player_1_probability = weighted_player_1_win_rate / total_weighted_win_rate
    player_2_probability = weighted_player_2_win_rate / total_weighted_win_rate

    return player_1_probability, player_2_probability



import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, access_dataframe_loc, get_max, assert_series, is_a_null_df, visit_by_index



def update_odds(match_id, performance_weight, h2h_weight, data, h2h_win_rates):
    player_1 = access_dataframe_loc(data, match_id, 'Player_1')
    player_2 = access_dataframe_loc(data, match_id, 'Player_2')

    player_1_win_rate = access_dataframe_loc(player_stats, assert_series(player_stats['Opponent'], player_1, 'equality'), 'Win Rate')
    player_2_win_rate = access_dataframe_loc(player_stats, assert_series(player_stats['Opponent'], player_2, 'equality'), 'Win Rate')

    player_1_win_rate = visit_by_index(player_1_win_rate, 0) if not is_a_null_df(player_1_win_rate) else 0
    player_2_win_rate = visit_by_index(player_2_win_rate, 0) if not is_a_null_df(player_2_win_rate) else 0

    h2h_player_1_win_rate = access_dataframe_loc(h2h_win_rates, assert_series(h2h_win_rates['Opponent'], player_2, 'equality'), 'Head-to-Head Win Rate')
    h2h_player_2_win_rate = access_dataframe_loc(h2h_win_rates, assert_series(h2h_win_rates['Opponent'], player_1, 'equality'), 'Head-to-Head Win Rate')

    h2h_player_1_win_rate = visit_by_index(h2h_player_1_win_rate, 0) if not is_a_null_df(h2h_player_1_win_rate) else 0
    h2h_player_2_win_rate = visit_by_index(h2h_player_2_win_rate, 0) if not is_a_null_df(h2h_player_2_win_rate) else 0

    player_1_probability, player_2_probability = estimate_probability(player_1_win_rate, player_2_win_rate, h2h_player_1_win_rate, h2h_player_2_win_rate, performance_weight, h2h_weight)
    max_odd_1 = get_max(data['Odd_1'])
    max_odd_2 = get_max(data['Odd_2'])

    if player_1_probability == 0:
        updated_odd_1 = max_odd_1
    else:
        updated_odd_1 = 1 / player_1_probability

    if player_2_probability == 0:
        updated_odd_2 = max_odd_2
    else:
        updated_odd_2 = 1 / player_2_probability

    return updated_odd_1, updated_odd_2